In this document we present the joint analysis of the PASS1A metabolomics datasets.
Load the data from the cloud, including: phenotypic data, metabolomic datasets, and metabolomics dictionary.
# load the data and functions to the session
# helper functions for unsupervised data matrix normalization
source("~/Desktop/repos/motrpac-bic-norm-qc/tools/unsupervised_normalization_functions.R")
# basic functions for reading data objects from GCP
source("~/Desktop/repos/motrpac-bic-norm-qc/tools/gcp_functions.R")
# helper functions for data reformat/reshape
source("~/Desktop/repos/motrpac-bic-norm-qc/tools/data_aux_functions.R")
# helper functions for reading and checking the metabolomics data from the cloud
source("~/Desktop/repos/motrpac-bic-norm-qc/tools/metabolomics_data_parsing_functions.R")
# functions for association analysis
source("~/Desktop/repos/motrpac-bic-norm-qc/tools/association_analysis_methods.R")
# functions for classification/regression and cross-validation
source("~/Desktop/repos/motrpac/tools/prediction_ml_tools.R")
library(metafor) # for meta-analysis
library(randomForest) # for classification tests
library(VennDiagram) # for overlaps
require(gridExtra) # for the venn diagrams
library(corrplot) # for overlap/correlation matrices
library(ggplot2)
# Load the dmaqc data
merged_dmaqc_data = load_from_bucket("merged_dmaqc_data2019-10-15.RData",
"gs://bic_data_analysis/pass1a/pheno_dmaqc/",T)
merged_dmaqc_data = merged_dmaqc_data[[1]]
rownames(merged_dmaqc_data) = as.character(merged_dmaqc_data$vial_label)
# define the tissue variable
merged_dmaqc_data$tissue = merged_dmaqc_data$sampletypedescription
# define the time to freeze variable
merged_dmaqc_data$time_to_freeze = merged_dmaqc_data$calculated.variables.time_death_to_collect_min +
merged_dmaqc_data$calculated.variables.time_collect_to_freeze_min
# blood freeze times
blood_samples =
merged_dmaqc_data$specimen.processing.sampletypedescription ==
"EDTA Plasma"
blood_freeze_time =
as.difftime(merged_dmaqc_data$specimen.processing.t_freeze,units = "mins") -
as.difftime(merged_dmaqc_data$specimen.processing.t_edtaspin,units="mins")
blood_freeze_time = as.numeric(blood_freeze_time)
time_to_freeze = merged_dmaqc_data$time_to_freeze[blood_samples] =
blood_freeze_time[blood_samples]
# Load our parsed metabolomics datasets
metabolomics_bucket_obj = load_from_bucket(
file = "metabolomics_parsed_datasets_pass1a_external_v2_04112020.RData",
bucket = "gs://bic_data_analysis/pass1a/metabolomics/")
metabolomics_parsed_datasets = metabolomics_bucket_obj$metabolomics_parsed_datasets
# destination for selected figures, tables and RData files
local_destination = "~/Desktop/pass1a_met_files/" # change if you want another dir
try(dir.create(local_destination))
analysis_out_files_dest = paste(local_destination,"/analysis_out_files/",sep="")
try(dir.create(analysis_out_files_dest))
Define the parameters for the analyses below including which covariates to take and what normalization methods to consider.
# Define the variables to be adjusted for:
biospec_cols = c(
"acute.test.distance",
"calculated.variables.time_to_freeze",
"bid" # required for matching datasets
)
differential_analysis_cols = c(
"animal.registration.sex",
"animal.key.timepoint",
"animal.key.is_control",
"sample_order"
)
# normalization method pairs to test
# specifies which normalized data entry to take
# first is for targeted datsets, second is for untargeted
tar_vs_untar_norm_pairs = data.frame(
tar_nrom = c("norm:none;metab:none","norm:none;metab:none"),
untar_norm = c("norm:none;metab:none","norm:med;metab:log"),
stringsAsFactors = F
)
# specify time points for pairwise differential analysis
tp_pairs_for_de = data.frame(
tp1 = c(1.0,4.0,7.0,24.0,7.0,0.0),
tp1_is_control = c(F,F,F,F,F,T),
tp2 = c(0.0,1.0,4.0,0.0,7.0,7.0),
tp2_is_control = c(F,F,F,T,T,T)
)
# load the data
metabolomics_processed_datasets = load_from_bucket(
file="metabolomics_processed_datasets04072020.RData",
bucket = "gs://bic_data_analysis/pass1a/metabolomics/"
)[[1]]
#' Transform the row names to motrpac names whenever possible.
get_motrpac_dict_rownames<-function(x,row_annot_x){
if(length(setdiff(rownames(x),row_annot_x[,1]))>0){
stop("rownames in data and annotation do not match")
}
if(length(setdiff(row_annot_x[,1],rownames(x)))>0){
warning("annotation has additional metabolites")
}
if(! "motrpac_comp_name" %in% colnames(row_annot_x)){
stop("no motrpac_comp_name column in annotation")
}
row_annot_x = row_annot_x[rownames(x),]
y = row_annot_x[,"motrpac_comp_name"]
inds = !is.na(y) & y!=""
rnames = rownames(x)
rnames[inds] = y[inds]
return(rnames)
}
# Reduce the metadata to the selected columns, transform row names
# to motrpac ids and add bid-based names
for(currname in names(metabolomics_processed_datasets)){
d = metabolomics_processed_datasets[[currname]]
curr_data = d$normalized_data[[1]]
# organize the metadata
# some covariates may not appear in the data because they had
# too many NAs
covered_covs = intersect(union(biospec_cols,differential_analysis_cols),
colnames(d$sample_meta))
curr_meta = d$sample_meta[colnames(curr_data),covered_covs]
# remove metadata variables with too many NAs
na_counts = apply(is.na(curr_meta),2,sum)
curr_meta = curr_meta[,na_counts/nrow(curr_meta) < 0.1]
motrpac_met_names = get_motrpac_dict_rownames(d$normalized_data[[1]],d$row_annot)
names(motrpac_met_names) = rownames(d$normalized_data[[1]])
# add the new fields
d$covs = curr_meta
d$motrpac_met_names = motrpac_met_names
metabolomics_processed_datasets[[currname]] = d
}
# Transform sample id to bid
for(currname in names(metabolomics_processed_datasets)){
d = metabolomics_processed_datasets[[currname]]
bids = as.character(d$sample_meta[colnames(d$normalized_data[[1]]),"bid"])
if(any(bids!=d$sample_meta[,"bid"])){
print(paste("Warning in dataset",
currname,"sample order in data and metadata do not match"))
}
if(length(unique(bids))==ncol(d$normalized_data[[1]])){
rownames(d$sample_meta) = bids
for(j in 1:length(d$normalized_data)){
colnames(d$normalized_data[[j]]) = bids
}
}
else{
print(paste("there are ",nrow(d$sample_meta),"samples in dataset",currname))
print(paste(" => bid column names are not unique in:",currname,", merging"))
oldmeta = d$sample_meta
newmeta_inds = c()
for(bid in unique(bids)){
newmeta_inds = c(newmeta_inds,which(bids==bid)[1])
}
newmeta = oldmeta[newmeta_inds,]
rownames(newmeta) = as.character(newmeta[,"bid"])
d$sample_meta = newmeta
for(j in 1:length(d$normalized_data)){
d$normalized_data[[j]] = aggregate_repeated_samples(
d$normalized_data[[j]],bids,na.rm=T
)
if(!(all(colnames(d$normalized_data[[j]]) %in%
rownames(newmeta)))){
print(paste("!!! Error in merging samples by bid in dataset",currname))
}
d$normalized_data[[j]] = d$normalized_data[[j]][
,rownames(newmeta)]
}
d$motrpac_met_names = d$motrpac_met_names[rownames(d$normalized_data[[1]])]
print("done")
}
metabolomics_processed_datasets[[currname]] = d
}
## [1] "there are 156 samples in dataset gastrocnemius,tar,ac_mayo,mayo"
## [1] " => bid column names are not unique in: gastrocnemius,tar,ac_mayo,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset gastrocnemius,tar,amines,mayo"
## [1] " => bid column names are not unique in: gastrocnemius,tar,amines,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset gastrocnemius,tar,cer_mayo,mayo"
## [1] " => bid column names are not unique in: gastrocnemius,tar,cer_mayo,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset gastrocnemius,tar,tca,mayo"
## [1] " => bid column names are not unique in: gastrocnemius,tar,tca,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset liver,tar,ac_mayo,mayo"
## [1] " => bid column names are not unique in: liver,tar,ac_mayo,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset liver,tar,amines,mayo"
## [1] " => bid column names are not unique in: liver,tar,amines,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset liver,tar,cer_mayo,mayo"
## [1] " => bid column names are not unique in: liver,tar,cer_mayo,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset liver,tar,tca,mayo"
## [1] " => bid column names are not unique in: liver,tar,tca,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset white_adipose,tar,ac_mayo,mayo"
## [1] " => bid column names are not unique in: white_adipose,tar,ac_mayo,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset white_adipose,tar,amines,mayo"
## [1] " => bid column names are not unique in: white_adipose,tar,amines,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset white_adipose,tar,cer_mayo,mayo"
## [1] " => bid column names are not unique in: white_adipose,tar,cer_mayo,mayo , merging"
## [1] "done"
## [1] "there are 156 samples in dataset white_adipose,tar,tca,mayo"
## [1] " => bid column names are not unique in: white_adipose,tar,tca,mayo , merging"
## [1] "done"
# Get the named datasets separately - easier to work with later
processed_named_datasets = list()
for(currname in names(metabolomics_processed_datasets)){
d = metabolomics_processed_datasets[[currname]]
inds = !is.na(d$row_annot$motrpac_comp_name)
d$row_annot = d$row_annot[inds,]
d$motrpac_met_names = d$motrpac_met_names[inds]
if(!all(d$motrpac_met_names == d$row_annot$motrpac_comp_name)){
print(paste("Error: row name problems in dataset ",currname))
}
for(j in 1:length(d$normalized_data)){
d$normalized_data[[j]] = d$normalized_data[[j]][inds,]
if(!all(rownames(d$normalized_data[[j]]) == rownames(d$row_annot))){
print(paste("Error: row name problems in dataset ",currname))
}
rownames(d$normalized_data[[j]]) = d$motrpac_met_names
}
processed_named_datasets[[currname]] = d
}
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7801770 416.7 14154850 756.0 NA 12171078 650.1
## Vcells 60886148 464.6 89054561 679.5 102400 68882124 525.6
Compare the overlap between the named metabolites. This is computed for all dataset pairs, but tissue-specific plots are presented for simplicity.
dataset2named_metabolites = lapply(processed_named_datasets,
function(x)rownames(x$normalized_data[[1]]))
Nd = length(processed_named_datasets)
overlap_matrix = matrix(0,Nd,Nd,dimnames = list(
names(processed_named_datasets),names(processed_named_datasets)
))
pairwise_shared_metabolites = list()
for(nn1 in names(processed_named_datasets)){
pairwise_shared_metabolites[[nn1]] = list()
for(nn2 in names(processed_named_datasets)){
shared_metabolites = intersect(
dataset2named_metabolites[[nn1]],dataset2named_metabolites[[nn2]])
pairwise_shared_metabolites[[nn1]][[nn2]] = shared_metabolites
overlap_matrix[nn1,nn2] = length(shared_metabolites)
}
}
dataset2is_targeted =
sapply(processed_named_datasets,function(x)x$is_targeted)
dataset2tissue =
sapply(processed_named_datasets,function(x)x$tissue)
tissue2overlap_matrix = list()
met_lists_coverage = c()
for(tissue in unique(dataset2tissue)){
currdatasets = names(dataset2tissue)[dataset2tissue==tissue]
if(length(currdatasets)<2){next}
tissue2overlap_matrix[[tissue]] =
overlap_matrix[currdatasets,currdatasets]
all_tissue_mets = unique(unlist(dataset2named_metabolites))
all_targeted_mets = unique(
unlist(dataset2named_metabolites[dataset2is_targeted & dataset2tissue==tissue])
)
all_untargeted_mets = unique(
unlist(dataset2named_metabolites[!dataset2is_targeted & dataset2tissue==tissue])
)
currv = rep("tar_only",length(all_tissue_mets))
in_tar = all_tissue_mets %in% all_targeted_mets
in_untar = all_tissue_mets %in% all_untargeted_mets
currv[in_untar] = "untar_only"
currv[in_untar & in_tar] = "both"
currv = paste(tissue,currv,sep=",")
met_lists_coverage = rbind(met_lists_coverage,
cbind(all_tissue_mets,currv))
}
save(overlap_matrix,tissue2overlap_matrix,met_lists_coverage,
dataset2named_metabolites, pairwise_shared_metabolites,
file = paste(analysis_out_files_dest,
"plarform_named_metabolite_overlaps.RData",sep=""))
# Show the pairwise overlap and the overall Venn diagram
# comparing targeted to untargeted
for (tissue in names(tissue2overlap_matrix)){
rownames(tissue2overlap_matrix[[tissue]]) = gsub(
paste0(tissue,","),"",rownames(tissue2overlap_matrix[[tissue]])
)
colnames(tissue2overlap_matrix[[tissue]]) = rownames(tissue2overlap_matrix[[tissue]])
corrplot(tissue2overlap_matrix[[tissue]],
is.corr = F,method = "number",number.cex = 0.7,cl.length = 11,
col= gray.colors(100,rev=T,start=0,end=1,gamma=1))
m = met_lists_coverage[grepl(tissue,met_lists_coverage[,2]),]
inter_area = sum(grepl("both",m[,2]))
tar_area = sum(grepl(",tar_only",m[,2])) + inter_area
untar_area = sum(grepl(",untar_only",m[,2])) + inter_area
venng = draw.pairwise.venn(tar_area,untar_area,inter_area,
c("Targeted","Untargeted"),lty = rep("blank",2),
fill = c("pink", "cyan"), alpha = rep(0.5, 2),
cat.dist = rep(0.01, 2),ind=F)
grid.arrange(gTree(children=venng),
top=paste(tissue,"measured named metabolites",sep=", "))
}
Compute the summary statistics for each combination of a metabolite, normalization method, and a pair of rat groups.
# For each normalization option (a pair from the list above) we compute
# all pairwise correlation matrices of the overlapping metabolites
single_metabolite_de = c()
for(currname in names(processed_named_datasets)){
d = processed_named_datasets[[currname]]
for(norm_method in names(d$normalized_data)){
x = d$normalized_data[[norm_method]]
# if data was not log-transformed then scale it
if(!grepl("log",norm_method)){
x = t(scale(t(x),center = T,scale = T))
}
x_meta = d$sample_meta
covs = data.frame(
sex = x_meta$animal.registration.sex,
sample_order = x_meta$sample_order
)
for(j in 1:nrow(tp_pairs_for_de)){
currres = t(apply(
x,1,simple_pairwise_differential_abundance,
tps = x_meta$animal.key.timepoint,
is_control = x_meta$animal.key.is_control,
tp1 = tp_pairs_for_de[j,1],tp1_iscontrol = tp_pairs_for_de[j,2],
tp2 = tp_pairs_for_de[j,3],tp2_iscontrol = tp_pairs_for_de[j,4],
covs = covs
))
if(!all(rownames(x)==rownames(currres))){
print("Error in rownames in DE results")
}
currres = data.frame(currres,stringsAsFactors = F,check.names = F)
currres$dataset = currname
currres$is_targeted = d$is_targeted
currres$tissue = d$tissue
currres$norm = norm_method
currres$is_log_scale = grepl("log",norm_method)
currres = cbind(currres,
data.frame(metabolite=rownames(x),stringsAsFactors = F,check.names = F))
rownames(currres) = NULL
single_metabolite_de = rbind(single_metabolite_de,currres)
}
# repeat the calculation if data was logged
if(grepl("log",norm_method)){
x = 2^x
x = t(scale(t(x),center = T,scale = T))
for(j in 1:nrow(tp_pairs_for_de)){
currres = t(apply(
x,1,simple_pairwise_differential_abundance,
tps = x_meta$animal.key.timepoint,
is_control = x_meta$animal.key.is_control,
tp1 = tp_pairs_for_de[j,1],tp1_iscontrol = tp_pairs_for_de[j,2],
tp2 = tp_pairs_for_de[j,3],tp2_iscontrol = tp_pairs_for_de[j,4],
covs = covs
))
currres = data.frame(currres)
currres$dataset = currname
currres$is_targeted = d$is_targeted
currres$tissue = d$tissue
currres$norm = norm_method
currres$is_log_scale = F
currres = cbind(currres,
data.frame(metabolite=rownames(x),stringsAsFactors = F,check.names = F))
rownames(currres) = NULL
single_metabolite_de = rbind(single_metabolite_de,currres)
}
}
}
}
single_metabolite_de = single_metabolite_de[!is.na(single_metabolite_de$Pvalue),]
table(single_metabolite_de$norm,single_metabolite_de$is_log_scale)
##
## FALSE TRUE
## norm:med;metab:log 24000 24000
## norm:none;metab:log 7008 6986
## norm:none;metab:none 31007 0
sapply(single_metabolite_de,class)
## tp1 tp1_isctrl tp2 tp2_isctrl Est Std
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## Tstat Pvalue dataset is_targeted tissue norm
## "numeric" "numeric" "character" "logical" "character" "character"
## is_log_scale metabolite
## "logical" "character"
tp_analysis_name = apply(single_metabolite_de[,1:4],1,paste,collapse=",")
norm_method2comparison_results = list()
for(norm_ind in 1:nrow(tar_vs_untar_norm_pairs)){
tar_norm = tar_vs_untar_norm_pairs[norm_ind,1]
untar_norm = tar_vs_untar_norm_pairs[norm_ind,2]
tar_is_log = grepl("log",tar_norm)
untar_is_log = grepl("log",tar_norm)
if(!tar_is_log || !untar_is_log){
untar_is_log = F
tar_is_log = F
}
meta_analysis_stats = list()
for(tissue in tissues){
for(tp in unique(tp_analysis_name)){
curr_subset = single_metabolite_de[
tp_analysis_name == tp &
single_metabolite_de$tissue == tissue,]
curr_subset = curr_subset[
(curr_subset$is_targeted & curr_subset$norm == tar_norm |
!curr_subset$is_targeted & curr_subset$norm == untar_norm) &
(curr_subset$is_targeted & curr_subset$is_log_scale == tar_is_log |
!curr_subset$is_targeted & curr_subset$is_log_scale == untar_is_log),]
for(metabolite in unique(curr_subset$metabolite)){
curr_met_data = curr_subset[curr_subset$metabolite==metabolite,]
if(is.null(dim(curr_met_data))||nrow(curr_met_data)==1){next}
if(length(unique(curr_met_data$is_targeted))<2){next}
curr_met_data$var = curr_met_data$Std^2
re_model1 = NULL;re_model2=NULL
re_model_tar = NULL;re_model_untar = NULL
try({re_model1 = rma.uni(curr_met_data$Est,curr_met_data$var,method="FE")})
try({re_model2 = rma.mv(curr_met_data$Est,curr_met_data$var,
mods=curr_met_data$is_targeted,method="FE")})
try({re_model_tar = rma.uni(
curr_met_data[curr_met_data$is_targeted==1,"Est"],
curr_met_data[curr_met_data$is_targeted==1,"var"],
method="FE"
)})
try({re_model_untar = rma.uni(
curr_met_data[curr_met_data$is_targeted==0,"Est"],
curr_met_data[curr_met_data$is_targeted==0,"var"],
method="FE"
)})
meta_analysis_stats[[paste(metabolite,tissue,tp,sep=";")]] =
list(curr_met_data=curr_met_data,re_model1=re_model1,
re_model2 = re_model2,re_model_tar=re_model_tar,
re_model_untar = re_model_untar,tar_is_log=tar_is_log,
untar_is_log=untar_is_log)
}
}
}
norm_method2comparison_results[[paste(tar_norm,untar_norm,sep=";")]] = meta_analysis_stats
}
# Add the results to the single_metabolite_de data frame
single_met_de_meta_anal = c()
for(norm_pair in names(norm_method2comparison_results)){
l = norm_method2comparison_results[[norm_pair]]
currres = c()
for(analysis_name in names(l)){
arr = strsplit(analysis_name,split=";")[[1]]
met = arr[1]
tissue = arr[2]
tps = strsplit(arr[3],split=",")[[1]]
tp1 = tps[1]
tp1_islog = tps[2]
tp2 = tps[3]
tp2_islog = tps[4]
is_log_scale = length(gregexpr("log",norm_pair))==2
# targeted meta-analysis
obj = l[[analysis_name]]$re_model_tar
stats = c(obj$beta,obj$se,obj$zval,obj$pval)
v = c(tp1,tp1_islog,tp2,tp2_islog,stats,
"tar_meta_analysis",TRUE,tissue,norm_pair,is_log_scale,met)
names(v) = colnames(single_metabolite_de)
currres = rbind(currres,v)
# untargeted meta-analysis
obj = l[[analysis_name]]$re_model_untar
stats = c(obj$beta,obj$se,obj$zval,obj$pval)
v = c(tp1,tp1_islog,tp2,tp2_islog,stats,
"untar_meta_analysis",TRUE,tissue,norm_pair,is_log_scale,met)
names(v) = colnames(single_metabolite_de)
currres = rbind(currres,v)
# all meta-analysis
obj = l[[analysis_name]]$re_model1
stats = c(obj$beta,obj$se,obj$zval,obj$pval)
v = c(tp1,tp1_islog,tp2,tp2_islog,stats,
"meta_analysis",TRUE,tissue,norm_pair,is_log_scale,met)
names(v) = colnames(single_metabolite_de)
currres = rbind(currres,v)
}
single_met_de_meta_anal = rbind(single_met_de_meta_anal,currres)
}
dim(single_met_de_meta_anal)
## [1] 14688 14
single_met_de_meta_anal = data.frame(single_met_de_meta_anal,stringsAsFactors = F)
# reformat the data frame
for(j in 1:ncol(single_met_de_meta_anal)){
mode(single_met_de_meta_anal[[j]]) = mode(single_metabolite_de[[j]])
}
single_met_de_meta_anal[1:3,]
single_metabolite_de = rbind(single_metabolite_de,single_met_de_meta_anal)
save(norm_method2comparison_results,
file = paste(analysis_out_files_dest,
"norm_method2comparison_results.RData",sep=""))
Get the overlap of results per tissue and normalization method. The overlap matrices show overlap of metabolite sets from each platform. A discovery is a metabolite with \(p<p_thr\) in at least one of the group pairwise comparisons.
get_met_overlap_matrix_from_df<-function(df){
met_list = list()
for(dataset in unique(df$dataset)){
met_list[[dataset]] = unique(df[df$dataset==dataset,"metabolite"])
}
n = length(met_list)
m = matrix(0,n,n,dimnames = list(names(met_list),names(met_list)))
for(i in 1:n){
for(j in 1:n){
m[i,j] = length(intersect(met_list[[i]],met_list[[j]]))
}
}
return(m)
}
pthr = 0.001
tissues = unique(sapply(processed_named_datasets,function(x)x$tissue))
for(norm_ind in 1:nrow(tar_vs_untar_norm_pairs)){
tar_norm = tar_vs_untar_norm_pairs[norm_ind,1]
untar_norm = tar_vs_untar_norm_pairs[norm_ind,2]
tar_is_log = grepl("log",tar_norm)
untar_is_log = grepl("log",tar_norm)
if(!tar_is_log || !untar_is_log){
untar_is_log = F
tar_is_log = F
}
for(tissue in tissues){
curr_inds = single_metabolite_de$Pvalue < pthr &
single_metabolite_de$tissue == tissue &
(single_metabolite_de$is_targeted & single_metabolite_de$norm == tar_norm |
!single_metabolite_de$is_targeted & single_metabolite_de$norm == untar_norm) &
(single_metabolite_de$is_targeted & single_metabolite_de$is_log_scale == tar_is_log |
!single_metabolite_de$is_targeted & single_metabolite_de$is_log_scale == untar_is_log)
# add the meta-analyses
curr_inds = curr_inds |
( single_metabolite_de$Pvalue < pthr &
single_metabolite_de$tissue == tissue &
grepl("meta_analysis",single_metabolite_de$dataset) &
single_metabolite_de$norm == paste(tar_norm,untar_norm,sep=";")
)
# curr_inds = curr_inds & grepl("Car",single_metabolite_de$metabolite)
if(sum(curr_inds)==0){next}
curr_overlaps = get_met_overlap_matrix_from_df(single_metabolite_de[curr_inds,])
if(length(curr_overlaps) < 2){next}
rownames(curr_overlaps) = gsub(paste0(tissue,","),"",rownames(curr_overlaps))
colnames(curr_overlaps) = rownames(curr_overlaps)
corrplot(curr_overlaps,
is.corr = F,method = "number",number.cex = 0.8,cl.length = 11,
col= gray.colors(100,rev=T,start=0,end=1,gamma=1))
mtext(side=3,cex=1.1,adj=0.05,
paste(tissue,paste0("tar:",tar_norm),
paste0("untar:",untar_norm),sep="\n"))
}
}
Here are the results for lactate in plasma and ACs in gastrocnemius.
meta_analysis_stats = norm_method2comparison_results[[1]]
lact_res = meta_analysis_stats[
grepl("lact",names(meta_analysis_stats),ignore.case = T) &
grepl("plasma",names(meta_analysis_stats),ignore.case = T)
]
for(lact_example in names(lact_res)){
curr_labels = gsub("plasma,","",
meta_analysis_stats[[lact_example]][[1]][,"dataset"])
forest(meta_analysis_stats[[lact_example]]$re_model1,
slab = curr_labels,
main = lact_example,xlab = "Log fc",
col = "blue",cex = 1.1)
}
Car_res = meta_analysis_stats[
grepl("Car",names(meta_analysis_stats),ignore.case = T) &
grepl("gastroc",names(meta_analysis_stats),ignore.case = T)
]
for(car_example in names(Car_res)){
if(meta_analysis_stats[[car_example]]$re_model_tar$pval > 0.001){next}
curr_labels = gsub("gastrocnemius,","",
meta_analysis_stats[[car_example]][[1]][,"dataset"])
forest(meta_analysis_stats[[car_example]]$re_model1,
slab = curr_labels,
main = car_example,xlab = "Log fc",
col = "blue",cex = 1.1)
}
From the plots above we take the most extreme examples and examine their forest plots.
meta_analysis_stats = norm_method2comparison_results[[1]]
# P-value for the difference between targeted and untargeted
targeted_diff_p =
sapply(meta_analysis_stats,function(x)x$re_model2$pval[2])
# P-values - targeted vs. untargeted
pvals_tar = sapply(meta_analysis_stats,function(x)x$re_model_tar$pval)
pvals_untar = sapply(meta_analysis_stats,function(x)x$re_model_untar$pval)
pvals_untar = unlist(pvals_untar[sapply(pvals_untar,length)>0])
agree_example = names(sample(which(pvals_tar< 1e-5 & pvals_untar < 1e-5 &
targeted_diff_p > 0.1))[1])
simplify_labels_for_forest<-function(s){
s = gsub(",untargeted","",s)
tissue = strsplit(s,split=",")[[1]][1]
s = gsub(paste(tissue,",",sep=""),"",s)
return(s)
}
forest(meta_analysis_stats[[agree_example]]$re_model1,
slab = simplify_labels_for_forest(
meta_analysis_stats[[agree_example]][[1]][,"dataset"]),
main = paste(agree_example,"significant in both, tar and untar agree",sep="\n"),
xlab = "Log fc",col = "blue")
agree_p_disagree_beta = names(sample(which(pvals_tar< 1e-5 & pvals_untar < 1e-5 &
targeted_diff_p < 0.01))[1])
forest(meta_analysis_stats[[agree_p_disagree_beta]]$re_model1,
slab = simplify_labels_for_forest(
meta_analysis_stats[[agree_p_disagree_beta]][[1]][,"dataset"]),
main = paste(agree_p_disagree_beta,
"significant in both, tar and untar disagree",sep="\n"),
xlab = "Log fc",col = "blue")
disagree_example1 = names(sample(which(pvals_tar< 1e-5 & pvals_untar >0.1))[1])
forest(meta_analysis_stats[[disagree_example1]]$re_model1,
slab = simplify_labels_for_forest(
meta_analysis_stats[[disagree_example1]][[1]][,"dataset"]),
main = paste(disagree_example1,
"significant targeted, tar and untar disagree",sep="\n"),
xlab = "Log fc",col = "blue")
disagree_example2 = names(sample(which(pvals_tar > 0.1 & pvals_untar < 1e-5))[1])
forest(meta_analysis_stats[[disagree_example2]]$re_model1,
slab = simplify_labels_for_forest(
meta_analysis_stats[[disagree_example2]][[1]][,"dataset"]),
main = paste(disagree_example2,
"significant in untargeted, tar and untar disagree",sep="\n"),
xlab = "Log fc",col = "blue")
Plot some of Duke’s results - should fit their presentation (see shared drive).
d = metabolomics_processed_datasets$`plasma,tar,ac_duke,duke`
df = data.frame(
car18_1 = d$normalized_data[[2]]["Car(18:1)",],
sex = d$sample_meta$animal.registration.sex,
time = d$sample_meta$animal.key.timepoint
)
# this should fit slide 6 in the Duke's pdf
boxplot(car18_1~time,data=df[df$sex==2,],main = "Car18:1 data in males, plasma")
d = metabolomics_processed_datasets$`gastrocnemius,tar,ka,duke`
df = data.frame(
ketoleucine = d$normalized_data[[2]]["ketoleucine",],
sex = d$sample_meta$animal.registration.sex,
time = d$sample_meta$animal.key.timepoint
)
# this should fit slide 13 in the Duke's pdf
boxplot(ketoleucine~time,data=df[df$sex==1,])
pthr = 0.001
tar_vs_untar_correlations = c()
for(tissue in tissues){
for(pair_name in names(norm_method2comparison_results)){
meta_analysis_stats =
norm_method2comparison_results[[pair_name]]
meta_analysis_stats = meta_analysis_stats[
grepl(tissue,names(meta_analysis_stats))
]
if(is.null(meta_analysis_stats) || length(meta_analysis_stats)==0){next}
# P-value for the difference between targeted and untargeted
targeted_diff_p =
sapply(meta_analysis_stats,function(x)x$re_model2$pval[2])
# P-values - targeted vs. untargeted
pvals_tar = sapply(meta_analysis_stats,function(x)x$re_model_tar$pval)
pvals_untar = sapply(meta_analysis_stats,function(x)x$re_model_untar$pval)
pvals_untar = unlist(pvals_untar[sapply(pvals_untar,length)>0])
significant_in = rep("None",length(pvals_untar))
significant_in[pvals_tar<pthr] = "Targeted"
significant_in[pvals_untar<pthr] = "Untargeted"
significant_in[pvals_tar<pthr & pvals_untar<pthr] = "Both"
significant_diff = targeted_diff_p<pthr
rho = cor(-log10(pvals_tar),-log10(pvals_untar),method = "pearson")
# Betas - targeted vs. untargeted
betas_tar =
sapply(meta_analysis_stats,function(x)x$re_model_tar$beta[1,1])
betas_untar =
sapply(meta_analysis_stats,function(x)x$re_model_untar$beta[1,1])
betas_untar = unlist(betas_untar[sapply(betas_untar,length)>0])
df = data.frame(
targeted = betas_tar,
untargeted = betas_untar,
significant_in = significant_in,
significant_diff = significant_diff
)
rho_beta = cor(betas_untar,betas_tar,method = "pearson")
rhop = cor.test(betas_untar,betas_tar,method = "pearson")$p.value
print(
ggplot(df, aes(x=targeted, y=untargeted,
shape=significant_diff, color=significant_in)) +
geom_point() + geom_abline(slope=1,intercept = 0) +
ggtitle(paste(tissue, pair_name,
"effects, rho=:",format(rho_beta,digits=2)))
)
tar_vs_untar_correlations = rbind(tar_vs_untar_correlations,
c(tissue,pair_name,rho,rho_beta))
# draw a venn diagram
inter_area = sum(significant_in=="Both")
tar_area = sum(significant_in=="Targeted") + inter_area
untar_area = sum(significant_in=="Untargeted") + inter_area
subt = paste("Not significant in both:",table(significant_in)["None"])
ss = paste(sample(names(pvals_untar)[significant_in=="Targeted"])[1:10],collapse=";")
ss = gsub(paste(",",tissue,sep=""),"",ss)
# ss
ss = paste(sample(names(pvals_untar)[significant_in=="Untargeted"])[1:10],collapse=";")
ss = gsub(paste(",",tissue,sep=""),"",ss)
# ss
venng = draw.pairwise.venn(tar_area,untar_area,inter_area,
c("Targeted","Untargeted"),lty = rep("blank",2),
fill = c("pink", "cyan"), alpha = rep(0.5, 2),
cat.dist = rep(0.01, 2),ind=F)
# grid.newpage()
grid.arrange(gTree(children=venng),
top=paste(tissue,pair_name), bottom=subt)
}
}